#160714 - write_taxonomy table + % match from NCBI data!

setwd("~/Documents/UNI_und_VORLESUNGEN/11 phd projects/1 Meta SCHMALNAU/2 HiSeq biomass 160707/8 taxonomy")



plottables <- list.files("blast_hits/blast", full.names=T, pattern=".csv$")

#f <- 153
data <- data.frame("NCBI_accession"=NA, "NCBI_order"=NA, "NCBI_Fam"=NA, "NCBI_sp"=NA, "NCBI_Ident"=NA)


for (f in 1:length(plottables)){


if("\"\""!=(readLines(plottables[f])[1])){ # check if file is empty!
	
plotdata <- read.csv(plottables[f], stringsAsFactors=F)


OTU <- sub(paste("blast_hits/blast", "/(.*)", sep=""), "\\1", plottables[f])
# calc % identity
ident <- plotdata$Hsp_identity/plotdata$Hsp_align.len

#calc % coverage
#tempL <- DNAlength[names(DNAlength)==sub(".csv", "", OTU)]
#coverage <- nchar(gsub("-", "", plotdata$Hsp_qseq))/tempL


# taxonomy


taxo <- readLines(sub("blast_hits/blast", "blast_hits/blast_taxonomy", plottables[f]))

start <- grep("    <LineageEx>", taxo)
end <- grep("    </LineageEx>", taxo)

orderA <- c(rep(NA, 100))
famA <- c(rep(NA, 100))
#genusA <- c(rep(NA, 100))

for (i in 1){
meep <- taxo[start[i]:end[i]]

order <- sub("            <ScientificName>(.*)</ScientificName>", "\\1", meep[which(meep=="            <Rank>order</Rank>")-1])
if(length(order)!=0){orderA[i] <- order}
fam <- sub("            <ScientificName>(.*)</ScientificName>", "\\1", meep[which(meep=="            <Rank>family</Rank>")-1])
if(length(fam)!=0){famA[i] <- fam}
#genus[i] <- sub("            <ScientificName>(.*)</ScientificName>", "\\1", meep[which(meep=="            <Rank>genus</Rank>")-1])

taxo[(start[i]:end[i])] <- NA # rm phrades elements
}

species <- taxo[grep("    <ScientificName>.*</ScientificName>", taxo)]
species <- sub("    <ScientificName>(.*)</ScientificName>", "\\1", species)


info <- c(plotdata$Hit_accession[1], order[1], fam[1], species[1], round(ident[1]*100, digits=2))

data <- rbind(data, info)

} else {data <- rbind(data, c(NA,NA,NA,NA,NA))}


}


data <- data[-1, ]


OTUnum <- as.numeric(sub(".*/OTU_(.*);size.*.csv", "\\1", plottables))

data <- data[order(OTUnum),]

write.csv(data, file="160714_tax_ncbi.csv")



